clear
set more off
set matsize 5000

global lecind    "\\intra\partages\au_amic2\SRCV\TABLES_INITIALES_FEV2021\INPUT\INDIVIDUS"
global lecindsas "\\intra\partages\au_amic2\SRCV_AVRIL2021\TABLES_INITIALES_AVRIL2021\INDIVIDUS"
global lecindcsv "\\intra\partages\au_amic2\SRCV_AVRIL2021\TABLES_INITIALES_AVRIL2021\INDIVIDUS"
global ecrind    "\\intra\partages\au_amic2\SRCV_AVRIL2021\BASES_FEV2022_V5\2020\INDIVIDUS\autres_var"
global lectabpas "\\intra\partages\au_amic2\SRCV_AVRIL2021\TABLES_INITIALES_AVRIL2021\SRCV_2020"
global ecrstat   "\\intra\partages\au_amic2\SRCV_AVRIL2021\STAT_fev2022_V5\2020_new"

/* 1 - EXTRACTION DES VARIABLES SELECTIONNéES */
/* fichier CSV 3 ème livraison 2020 */
/* ATTENTION nbtsal ignorée pour le moment car modif. du nombre de modalités */

foreach an of numlist 20 {
cd "$lecindcsv"
use individus_20`an'.dta, clear
keep RB030 RB040 age arope sexe nbhpr CS24 nbtsal DIP11 naf occup situa actif actoccup PL080
gen annee_SRCV = 20`an'
order RB030 RB040 annee_SRCV 
cd "$ecrind"
save autres_indiv_`an'.dta, replace
display `an'
count 
}

/* 2 - CHANGEMENT DE NOM DES VARIABLES */
foreach an of numlist 20 {
cd "$ecrind"
use autres_indiv_`an'.dta, clear
rename naf      	NAF2_REV2_1P
rename CS24     	cs24
rename DIP11    	dip11
rename PL080 	    pl080
save "$ecrind\autres_indiv_`an'.dta", replace
}


/* 3 - MISE EN FORME DES VARIABLES */

* 3-1 REMPLACENEMENT DES NA PAR BLANC & DESTRING
clear
use "$ecrind\autres_indiv_20.dta", clear
replace nbhpr = "" if nbhpr == "NA"
tab nbhpr, missing
replace pl080 = "" if pl080 == "NA"
tab pl080, missing
replace NAF2_REV2_1P = "" if NAF2_REV2_1P == "NA"
tab NAF2_REV2_1P, missing
*nbtsal
tab nbtsal annee_SRCV, missing
tab nbtsal , missing
destring nbtsal, replace
tab nbtsal , missing
*actif
tab actif annee_SRCV, missing
tab actif , missing
destring actif, replace
tab actif , missing
*actoccup
tab actoccup annee_SRCV, missing
tab actoccup , missing
destring actoccup, replace
tab actoccup , missing
*CS24
tab cs24 annee_SRCV, missing
tab cs24 , missing
destring cs24, replace
tab cs24 , missing
*DIP11
tab dip11 annee_SRCV, missing
tab dip11 , missing
destring dip11, replace
tab dip11 , missing
*nbhpr
tab nbhpr annee_SRCV, missing
tab nbhpr , missing
destring nbhpr, replace
tab nbhpr , missing
*occup
tab occup annee_SRCV, missing
tab occup , missing
destring occup, replace
tab occup , missing
*sexe
tab sexe annee_SRCV, missing
tab sexe , missing
destring sexe, replace
tab sexe , missing
*situa
tab situa annee_SRCV, missing
tab situa , missing
destring situa, replace
tab situa , missing
*pl080
tab pl080 annee_SRCV, missing
tab pl080 , missing
destring pl080, replace
tab pl080 , missing
save "$ecrind\autres_indiv_20.dta", replace


* 3-2 CREATION VARIABLE NUMERIQUE DE NAF2_REV2_1P et arope
clear
use "$ecrind\autres_indiv_20.dta", clear
tab NAF2_REV2_1P annee_SRCV, missing
tab NAF2_REV2_1P , missing

gen NAF2_REV2_1P_NEW = .
replace NAF2_REV2_1P_NEW = 1  if NAF2_REV2_1P == "A"
replace NAF2_REV2_1P_NEW = 2  if NAF2_REV2_1P == "B"
replace NAF2_REV2_1P_NEW = 3  if NAF2_REV2_1P == "C"
replace NAF2_REV2_1P_NEW = 4  if NAF2_REV2_1P == "D"
replace NAF2_REV2_1P_NEW = 5  if NAF2_REV2_1P == "E"
replace NAF2_REV2_1P_NEW = 6  if NAF2_REV2_1P == "F"
replace NAF2_REV2_1P_NEW = 7  if NAF2_REV2_1P == "G"
replace NAF2_REV2_1P_NEW = 8  if NAF2_REV2_1P == "H"
replace NAF2_REV2_1P_NEW = 9  if NAF2_REV2_1P == "I"
replace NAF2_REV2_1P_NEW = 10 if NAF2_REV2_1P == "J"
replace NAF2_REV2_1P_NEW = 11 if NAF2_REV2_1P == "K"
replace NAF2_REV2_1P_NEW = 12 if NAF2_REV2_1P == "L"
replace NAF2_REV2_1P_NEW = 13 if NAF2_REV2_1P == "M"
replace NAF2_REV2_1P_NEW = 14 if NAF2_REV2_1P == "N"
replace NAF2_REV2_1P_NEW = 15 if NAF2_REV2_1P == "O"
replace NAF2_REV2_1P_NEW = 16 if NAF2_REV2_1P == "P"
replace NAF2_REV2_1P_NEW = 17 if NAF2_REV2_1P == "Q"
replace NAF2_REV2_1P_NEW = 18 if NAF2_REV2_1P == "R"
replace NAF2_REV2_1P_NEW = 19 if NAF2_REV2_1P == "S"
replace NAF2_REV2_1P_NEW = 20 if NAF2_REV2_1P == "U"
tab NAF2_REV2_1P_NEW, missing
label define L_NAF2_REV2_1P_NEW 1  "A" 2  "B" 3  "C" 4  "D" 5  "E" 6  "F" 7  "G" 8  "H" 9 "I" ///
								10 "J" 11 "K" 12 "L" 13 "M" 14 "N" 15 "O" 16 "P" 17 "Q" 18 "R" ///
								19 "S" 20 "U"
label values NAF2_REV2_1P_NEW L_NAF2_REV2_1P_NEW
tab NAF2_REV2_1P_NEW, missing

*arope
tab arope annee_SRCV, missing
tab arope , missing
gen arope_new = .
replace arope_new = 0    if arope == "000"
replace arope_new = 1    if arope == "001"
replace arope_new = 10   if arope == "010"
replace arope_new = 11   if arope == "011"
replace arope_new = 100  if arope == "100"
replace arope_new = 101  if arope == "101"
replace arope_new = 110  if arope == "110"
replace arope_new = 111  if arope == "111"
tab arope_new, missing
label define L_arope_new 0 "000" 1 "001" 10 "010" 11 "011" 100 "100" 101 "101" 110 "110" 111 "111"
label values arope_new L_arope_new
tab arope_new, missing
save "$ecrind\autres_indiv_20.dta", replace


* 3-3 CREATION VARIABLE NUMERIQUE SITUA_NEW HOMOGENEISEE SUR TOUTE LA PERIODE AVEC 7 modalités 
clear
use "$ecrind\autres_indiv_20.dta", clear
gen situa_new = situa
replace situa_new=7 if situa == 8 
tab situa annee_SRCV, missing
tab situa_new annee_SRCV, missing
save "$ecrind\autres_indiv_20.dta", replace

/* 4 - CREATION DE YEAR */
clear
use "$ecrind\autres_indiv_20.dta", clear
gen year = annee_SRCV - 1
save "$ecrind\autres_indiv_20.dta", replace


/* 5 - CREATION DE LA VARIABLE DIPLOME */
clear
use "$ecrind\autres_indiv_20.dta", clear
gen DIPLOME = .
replace DIPLOME = 1 if dip11 == 10 
replace DIPLOME = 1 if dip11 == 11
replace DIPLOME = 2 if dip11 == 30
replace DIPLOME = 3 if dip11 == 31 
replace DIPLOME = 4 if dip11 == 33
replace DIPLOME = 5 if dip11 == 41 
replace DIPLOME = 6 if dip11 == 42
replace DIPLOME = 7 if dip11 == 50 
replace DIPLOME = 8 if dip11 == 60 
replace DIPLOME = 8 if dip11 == 70  
replace DIPLOME = 9 if dip11 == 71  
tab dip11 DIPLOME, missing
 
sort  RB040 RB030 annee_SRCV   
order RB040 RB030 annee_SRCV  year  age  dip11 DIPLOME  NAF2_REV2_1P NAF2_REV2_1P_NEW situa situa_new
save "$ecrind\autres_indiv_2020_def.dta", replace


/* controle nb obs */
clear
use "$ecrind\autres_indiv_2020_def.dta", clear
clear
use "$ecrind\autres_indiv_2020_def.dta", clear
duplicates drop  RB040 , force
clear
use "$ecrind\autres_indiv_2020_def.dta", clear
duplicates drop  RB030 , force

/* 6 - CREATION DE LA VARIABLE DIPLOME */
clear
use "$ecrind\autres_indiv_2020_def.dta", clear
rename RB030 rb030
save "$ecrind\autres_indiv_2020_def.dta", replace

/* 7 - CREATION DE NBTSAL_NEW */
clear
use "$ecrind\autres_indiv_2020_def.dta", clear
rename nbtsal nbtsal_new
tab nbtsal_new, missing
save "$ecrind\autres_indiv_2020_def.dta", replace

/* 8 - vérification cohérence temporelle des variables modalités */
clear
use "$ecrind\autres_indiv_2020_def.dta"
cd "$ecrstat"
foreach var of varlist `var' nbtsal_new {
tab `var', matcell(freq) matrow(names)
putexcel set "$ecrstat\verif_var_individu_2020.xlsx", sheet("`var'") modify
putexcel AN2=("2020") 		AO2=("Freq.") 	 AP2=("Percent")
putexcel AN3=matrix(names)  AO3=matrix(freq) AP3=matrix(100*(freq/r(N)))
}
